library(rstan)
## Warning: package 'rstan' was built under R version 4.3.3
## Loading required package: StanHeaders
## Warning: package 'StanHeaders' was built under R version 4.3.3
## 
## rstan version 2.32.7 (Stan version 2.32.2)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(rstanarm)
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.3.3
## This is rstanarm version 2.32.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
##   options(mc.cores = parallel::detectCores())
## 
## Attaching package: 'rstanarm'
## The following object is masked from 'package:rstan':
## 
##     loo
library(bayesplot)
## Warning: package 'bayesplot' was built under R version 4.3.3
## This is bayesplot version 1.12.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
library(tidyr) # for pivot_longer
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:rstan':
## 
##     extract
library(dplyr) # for %>%
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(truncnorm) # for rnorm with minimum and maximum values
library(loo)
## Warning: package 'loo' was built under R version 4.3.3
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## 
## Attaching package: 'loo'
## The following object is masked from 'package:rstan':
## 
##     loo
library(lubridate) # for simplifying working with dates
## Warning: package 'lubridate' was built under R version 4.3.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
# Create custom palette because I want to distinguish well between nations and don't like the existing options
custom_palette <- c("#9F002E", "#B23AEE", "#FF50FF", "#FF7F00", "#FFB900", "#00EEEE", "#4EEE94","#458B00", "#4876FF")

# Load the dataset
#install.packages("mlmRev")
library(mlmRev)
## Loading required package: lme4
## Warning: package 'lme4' was built under R version 4.3.3
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
data("Mmmec")

# Check for missing values
colSums(is.na(Mmmec))
##   nation   region   county   deaths expected      uvb 
##        0        0        0        0        0        0
# Summarize statistics about deaths and uvb overall
summary(Mmmec$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    8.00   14.50   27.83   31.00  313.00
# 1st quantile = 8 means that 25% of the observations have less than 8 deaths
# 3rd quantile = 31 means that 75% of the observations have less than 31 deaths
deaths_by_country <- aggregate(deaths ~ nation, data = Mmmec, sum)
print(deaths_by_country)
##        nation deaths
## 1     Belgium    449
## 2   W.Germany   2949
## 3     Denmark    681
## 4      France   1495
## 5          UK   2179
## 6       Italy   1462
## 7     Ireland     67
## 8  Luxembourg     23
## 9 Netherlands    546
summary(Mmmec$uvb)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -8.900200 -4.158400 -0.886400  0.000204  3.275525 13.359000
# Explorative analysis

# Some histograms of the distribution of deaths and expected deaths

# Only deaths
ggplot(Mmmec, aes(x = deaths)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black") +
  ggtitle("Distribution of Deaths")

ggsave(file="images/distribution_deaths.pdf", width=18,height=10.5)

# Only expected deaths
ggplot(Mmmec, aes(x = expected)) +
  geom_histogram(binwidth = 1, fill = "yellow", color = "black") +
  ggtitle("Distribution of Expected Deaths")

ggsave(file="images/distribution_expecteddeaths.pdf", width=18,height=10.5)

# Both deaths and expected deaths
# Gather the data into a long format using pivot_longer
Mmmec_long <- Mmmec %>%
  pivot_longer(cols = c(deaths, expected), names_to = "type", values_to = "value")
# Create an overlapped (position="identity") histogram with transparency (alpha=0.6)
ggplot(Mmmec_long, aes(x = value, fill = type)) +
  geom_histogram(binwidth = 1, alpha = 0.6, position = "identity", color = "black") +
  ggtitle("Distribution of Deaths and Expected Deaths") +
  xlab("Deaths / Expected Deaths") +
  scale_fill_manual(values = c("deaths" = "red", "expected" = "yellow"), labels = c("Deaths", "Expected Deaths"))

ggsave(file="images/distribution_deaths_expected.pdf", width=18,height=10.5)

# Some scatter plots of deaths VS UVB exposure

# With a single regression line
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
  geom_smooth(method = "lm", color = "blue") +
  scale_colour_manual(values = custom_palette) +
  ggtitle("Deaths VS UVB Exposure") +
  geom_jitter()
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb.pdf", width=9.3,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# With a regression line for each nation
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
  geom_smooth(method = "lm", se = FALSE) +  # Adds a regression line for each nation
  scale_colour_manual(values = custom_palette) +
  ggtitle("Deaths VS UVB Exposure") +
  geom_jitter()
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb_regression_by_nation.pdf", width=9.3,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# Or dividing by nation using facet_wrap, so that it is less chaotic
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
  geom_point() +
  geom_smooth(method = "lm", color = "blue") +
  facet_wrap(~ nation) +
  scale_colour_manual(values = custom_palette) +
  ggtitle("Deaths VS UVB Exposure by Nation") +
  theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb_by_nation.pdf", width=8,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# Boxplots of deaths by nation
ggplot(Mmmec, aes(x = nation, y = deaths)) +
  geom_boxplot(fill = custom_palette) +
  ggtitle("Deaths across counties in nations")

ggsave(file="images/boxplot_deaths_by_nation.pdf", width=8,height=7)


# Generalized linear mixed model with frequentist approach (in particular ML approach)
M1 <- glmer(deaths ~ uvb + (1 | region) + (1 | nation), Mmmec, poisson, offset = log(expected))
# (1 | k) includes varying intercepts for each k
# log(expected) is an offset term to adjust the model to account for the expected number of deaths
summary(M1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: deaths ~ uvb + (1 | region) + (1 | nation)
##    Data: Mmmec
##  Offset: log(expected)
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    2198.7    2214.2   -1095.3    2190.7       350 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9440 -0.7788 -0.0071  0.6277  3.9102 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  region (Intercept) 0.04829  0.2198  
##  nation (Intercept) 0.13708  0.3702  
## Number of obs: 354, groups:  region, 78; nation, 9
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -0.06398    0.13351  -0.479   0.6318  
## uvb         -0.02822    0.01139  -2.478   0.0132 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr)
## uvb 0.185
# Bayes approach relying on HMC sampling from the posterior distribution
M1.rstanarm <- stan_glmer(deaths ~ uvb + (1 | region) + (1 | nation), Mmmec, poisson, offset = log(expected))
## 
## SAMPLING FOR MODEL 'count' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.001306 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 13.06 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 6.543 seconds (Warm-up)
## Chain 1:                5.267 seconds (Sampling)
## Chain 1:                11.81 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'count' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 5.7e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.57 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 6.965 seconds (Warm-up)
## Chain 2:                5.254 seconds (Sampling)
## Chain 2:                12.219 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'count' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 4.7e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.47 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 7.145 seconds (Warm-up)
## Chain 3:                5.21 seconds (Sampling)
## Chain 3:                12.355 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'count' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 5.1e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.51 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 7.386 seconds (Warm-up)
## Chain 4:                5.11 seconds (Sampling)
## Chain 4:                12.496 seconds (Total)
## Chain 4:
print(M1.rstanarm)
## stan_glmer
##  family:       poisson [log]
##  formula:      deaths ~ uvb + (1 | region) + (1 | nation)
##  observations: 354
## ------
##             Median MAD_SD
## (Intercept) -0.1    0.2  
## uvb          0.0    0.0  
## 
## Error terms:
##  Groups Name        Std.Dev.
##  region (Intercept) 0.23    
##  nation (Intercept) 0.48    
## Num. levels: region 78, nation 9 
## 
## ------
## * For help interpreting the printed output see ?print.stanreg
## * For info on the priors used see ?prior_summary.stanreg
summary(M1.rstanarm)
## 
## Model Info:
##  function:     stan_glmer
##  family:       poisson [log]
##  formula:      deaths ~ uvb + (1 | region) + (1 | nation)
##  algorithm:    sampling
##  sample:       4000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 354
##  groups:       region (78), nation (9)
## 
## Estimates:
##                                         mean   sd   10%   50%   90%
## (Intercept)                           -0.1    0.2 -0.3  -0.1   0.1 
## uvb                                    0.0    0.0  0.0   0.0   0.0 
## b[(Intercept) region:1]                0.4    0.1  0.2   0.4   0.6 
## b[(Intercept) region:2]                0.0    0.1 -0.1   0.0   0.2 
## b[(Intercept) region:3]               -0.5    0.1 -0.6  -0.5  -0.3 
## b[(Intercept) region:4]               -0.1    0.1 -0.2  -0.1   0.0 
## b[(Intercept) region:5]                0.1    0.1  0.0   0.1   0.2 
## b[(Intercept) region:6]                0.4    0.1  0.2   0.4   0.5 
## b[(Intercept) region:7]                0.0    0.1 -0.2   0.0   0.2 
## b[(Intercept) region:8]                0.3    0.1  0.1   0.3   0.4 
## b[(Intercept) region:9]               -0.1    0.1 -0.2  -0.1   0.1 
## b[(Intercept) region:10]              -0.1    0.1 -0.2  -0.1   0.0 
## b[(Intercept) region:11]              -0.3    0.1 -0.4  -0.3  -0.2 
## b[(Intercept) region:12]               0.0    0.1 -0.1   0.0   0.1 
## b[(Intercept) region:13]              -0.2    0.1 -0.4  -0.2   0.0 
## b[(Intercept) region:14]               0.1    0.1  0.0   0.1   0.2 
## b[(Intercept) region:15]               0.2    0.1  0.0   0.2   0.4 
## b[(Intercept) region:16]               0.1    0.1 -0.1   0.1   0.3 
## b[(Intercept) region:17]              -0.1    0.1 -0.3  -0.1   0.0 
## b[(Intercept) region:18]               0.1    0.1 -0.1   0.1   0.3 
## b[(Intercept) region:19]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:20]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:21]              -0.2    0.1 -0.4  -0.2   0.0 
## b[(Intercept) region:22]               0.0    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:23]               0.2    0.1  0.1   0.2   0.3 
## b[(Intercept) region:24]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:25]              -0.1    0.1 -0.2  -0.1   0.1 
## b[(Intercept) region:27]               0.0    0.1 -0.1   0.0   0.2 
## b[(Intercept) region:28]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:29]              -0.1    0.1 -0.2  -0.1   0.0 
## b[(Intercept) region:30]               0.1    0.1 -0.1   0.1   0.3 
## b[(Intercept) region:31]               0.1    0.2 -0.1   0.1   0.3 
## b[(Intercept) region:32]              -0.2    0.1 -0.3  -0.2   0.0 
## b[(Intercept) region:33]               0.0    0.1 -0.2   0.0   0.1 
## b[(Intercept) region:34]              -0.2    0.1 -0.3  -0.2  -0.1 
## b[(Intercept) region:35]              -0.1    0.1 -0.2  -0.1   0.1 
## b[(Intercept) region:36]              -0.4    0.1 -0.6  -0.4  -0.2 
## b[(Intercept) region:37]               0.0    0.1 -0.2   0.0   0.2 
## b[(Intercept) region:38]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:39]               0.1    0.1  0.0   0.1   0.2 
## b[(Intercept) region:40]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:41]               0.0    0.1 -0.1   0.0   0.2 
## b[(Intercept) region:42]              -0.2    0.1 -0.3  -0.2  -0.1 
## b[(Intercept) region:43]              -0.2    0.1 -0.3  -0.2  -0.1 
## b[(Intercept) region:44]               0.3    0.1  0.2   0.3   0.4 
## b[(Intercept) region:45]               0.3    0.1  0.1   0.3   0.4 
## b[(Intercept) region:46]              -0.1    0.1 -0.2  -0.1   0.1 
## b[(Intercept) region:47]               0.0    0.1 -0.1   0.0   0.1 
## b[(Intercept) region:48]               0.1    0.1 -0.1   0.1   0.2 
## b[(Intercept) region:49]              -0.1    0.1 -0.2  -0.1   0.0 
## b[(Intercept) region:50]              -0.2    0.1 -0.4  -0.2  -0.1 
## b[(Intercept) region:51]               0.0    0.1 -0.2   0.0   0.2 
## b[(Intercept) region:52]              -0.1    0.2 -0.3  -0.1   0.1 
## b[(Intercept) region:53]              -0.4    0.2 -0.6  -0.4  -0.2 
## b[(Intercept) region:54]              -0.6    0.1 -0.7  -0.6  -0.4 
## b[(Intercept) region:55]               0.2    0.1  0.1   0.2   0.3 
## b[(Intercept) region:56]               0.4    0.1  0.2   0.4   0.5 
## b[(Intercept) region:57]               0.2    0.1  0.0   0.2   0.3 
## b[(Intercept) region:58]               0.2    0.1  0.1   0.2   0.4 
## b[(Intercept) region:59]               0.0    0.1 -0.1   0.0   0.1 
## b[(Intercept) region:60]               0.0    0.1 -0.2   0.0   0.1 
## b[(Intercept) region:61]               0.0    0.2 -0.2   0.0   0.2 
## b[(Intercept) region:62]               0.3    0.1  0.1   0.3   0.4 
## b[(Intercept) region:63]              -0.2    0.1 -0.4  -0.2  -0.1 
## b[(Intercept) region:64]              -0.1    0.1 -0.3  -0.1   0.1 
## b[(Intercept) region:65]              -0.2    0.1 -0.4  -0.2   0.0 
## b[(Intercept) region:66]               0.2    0.1  0.0   0.2   0.3 
## b[(Intercept) region:67]               0.1    0.2 -0.1   0.1   0.3 
## b[(Intercept) region:68]               0.0    0.2 -0.2   0.0   0.2 
## b[(Intercept) region:69]               0.1    0.2 -0.1   0.1   0.4 
## b[(Intercept) region:70]               0.2    0.1  0.0   0.2   0.3 
## b[(Intercept) region:71]              -0.1    0.2 -0.4  -0.1   0.1 
## b[(Intercept) region:72]               0.0    0.2 -0.2   0.0   0.2 
## b[(Intercept) region:73]               0.0    0.2 -0.3   0.0   0.2 
## b[(Intercept) region:74]               0.0    0.2 -0.3   0.0   0.3 
## b[(Intercept) region:75]               0.0    0.2 -0.3   0.0   0.3 
## b[(Intercept) region:76]              -0.1    0.1 -0.3  -0.1   0.1 
## b[(Intercept) region:77]               0.0    0.1 -0.2   0.0   0.2 
## b[(Intercept) region:78]               0.2    0.1  0.1   0.2   0.4 
## b[(Intercept) region:79]              -0.1    0.1 -0.3  -0.1   0.0 
## b[(Intercept) nation:Belgium]         -0.1    0.2 -0.3  -0.1   0.2 
## b[(Intercept) nation:W.Germany]        0.5    0.2  0.3   0.5   0.7 
## b[(Intercept) nation:Denmark]          0.6    0.2  0.4   0.6   0.9 
## b[(Intercept) nation:France]          -0.5    0.2 -0.7  -0.5  -0.3 
## b[(Intercept) nation:UK]              -0.1    0.2 -0.3  -0.1   0.1 
## b[(Intercept) nation:Italy]            0.0    0.2 -0.3   0.0   0.3 
## b[(Intercept) nation:Ireland]         -0.5    0.2 -0.8  -0.5  -0.2 
## b[(Intercept) nation:Luxembourg]       0.0    0.3 -0.3   0.0   0.4 
## b[(Intercept) nation:Netherlands]      0.1    0.2 -0.2   0.1   0.3 
## Sigma[region:(Intercept),(Intercept)]  0.1    0.0  0.0   0.1   0.1 
## Sigma[nation:(Intercept),(Intercept)]  0.2    0.2  0.1   0.2   0.4 
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 27.8    0.4 27.3  27.8  28.3 
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##                                       mcse Rhat n_eff
## (Intercept)                           0.0  1.0  1132 
## uvb                                   0.0  1.0  1793 
## b[(Intercept) region:1]               0.0  1.0  3496 
## b[(Intercept) region:2]               0.0  1.0  2772 
## b[(Intercept) region:3]               0.0  1.0  2885 
## b[(Intercept) region:4]               0.0  1.0  1671 
## b[(Intercept) region:5]               0.0  1.0  1585 
## b[(Intercept) region:6]               0.0  1.0  2352 
## b[(Intercept) region:7]               0.0  1.0  4713 
## b[(Intercept) region:8]               0.0  1.0  2735 
## b[(Intercept) region:9]               0.0  1.0  1927 
## b[(Intercept) region:10]              0.0  1.0  1793 
## b[(Intercept) region:11]              0.0  1.0  1557 
## b[(Intercept) region:12]              0.0  1.0  2130 
## b[(Intercept) region:13]              0.0  1.0  3902 
## b[(Intercept) region:14]              0.0  1.0  2285 
## b[(Intercept) region:15]              0.0  1.0  2530 
## b[(Intercept) region:16]              0.0  1.0  2595 
## b[(Intercept) region:17]              0.0  1.0  2465 
## b[(Intercept) region:18]              0.0  1.0  5406 
## b[(Intercept) region:19]              0.0  1.0  4226 
## b[(Intercept) region:20]              0.0  1.0  5968 
## b[(Intercept) region:21]              0.0  1.0  6408 
## b[(Intercept) region:22]              0.0  1.0  5847 
## b[(Intercept) region:23]              0.0  1.0  4410 
## b[(Intercept) region:24]              0.0  1.0  4358 
## b[(Intercept) region:25]              0.0  1.0  6760 
## b[(Intercept) region:27]              0.0  1.0  6407 
## b[(Intercept) region:28]              0.0  1.0  5122 
## b[(Intercept) region:29]              0.0  1.0  3586 
## b[(Intercept) region:30]              0.0  1.0  4497 
## b[(Intercept) region:31]              0.0  1.0  7952 
## b[(Intercept) region:32]              0.0  1.0  5685 
## b[(Intercept) region:33]              0.0  1.0  4922 
## b[(Intercept) region:34]              0.0  1.0  4500 
## b[(Intercept) region:35]              0.0  1.0  4731 
## b[(Intercept) region:36]              0.0  1.0  5650 
## b[(Intercept) region:37]              0.0  1.0  6754 
## b[(Intercept) region:38]              0.0  1.0  3021 
## b[(Intercept) region:39]              0.0  1.0  3152 
## b[(Intercept) region:40]              0.0  1.0  3493 
## b[(Intercept) region:41]              0.0  1.0  2759 
## b[(Intercept) region:42]              0.0  1.0  3032 
## b[(Intercept) region:43]              0.0  1.0  2149 
## b[(Intercept) region:44]              0.0  1.0  1715 
## b[(Intercept) region:45]              0.0  1.0  2355 
## b[(Intercept) region:46]              0.0  1.0  2341 
## b[(Intercept) region:47]              0.0  1.0  2295 
## b[(Intercept) region:48]              0.0  1.0  3042 
## b[(Intercept) region:49]              0.0  1.0  2167 
## b[(Intercept) region:50]              0.0  1.0  4527 
## b[(Intercept) region:51]              0.0  1.0  6553 
## b[(Intercept) region:52]              0.0  1.0  6710 
## b[(Intercept) region:53]              0.0  1.0  4171 
## b[(Intercept) region:54]              0.0  1.0  4299 
## b[(Intercept) region:55]              0.0  1.0  3345 
## b[(Intercept) region:56]              0.0  1.0  4770 
## b[(Intercept) region:57]              0.0  1.0  4030 
## b[(Intercept) region:58]              0.0  1.0  5258 
## b[(Intercept) region:59]              0.0  1.0  2330 
## b[(Intercept) region:60]              0.0  1.0  5870 
## b[(Intercept) region:61]              0.0  1.0  8353 
## b[(Intercept) region:62]              0.0  1.0  3113 
## b[(Intercept) region:63]              0.0  1.0  4517 
## b[(Intercept) region:64]              0.0  1.0  4910 
## b[(Intercept) region:65]              0.0  1.0  2993 
## b[(Intercept) region:66]              0.0  1.0  4218 
## b[(Intercept) region:67]              0.0  1.0  5987 
## b[(Intercept) region:68]              0.0  1.0  6836 
## b[(Intercept) region:69]              0.0  1.0  7666 
## b[(Intercept) region:70]              0.0  1.0  2826 
## b[(Intercept) region:71]              0.0  1.0  6139 
## b[(Intercept) region:72]              0.0  1.0  5247 
## b[(Intercept) region:73]              0.0  1.0  6022 
## b[(Intercept) region:74]              0.0  1.0  7826 
## b[(Intercept) region:75]              0.0  1.0  5134 
## b[(Intercept) region:76]              0.0  1.0  3391 
## b[(Intercept) region:77]              0.0  1.0  3109 
## b[(Intercept) region:78]              0.0  1.0  2822 
## b[(Intercept) region:79]              0.0  1.0  2852 
## b[(Intercept) nation:Belgium]         0.0  1.0  1568 
## b[(Intercept) nation:W.Germany]       0.0  1.0  1258 
## b[(Intercept) nation:Denmark]         0.0  1.0  1527 
## b[(Intercept) nation:France]          0.0  1.0  1218 
## b[(Intercept) nation:UK]              0.0  1.0  1272 
## b[(Intercept) nation:Italy]           0.0  1.0  1070 
## b[(Intercept) nation:Ireland]         0.0  1.0  1890 
## b[(Intercept) nation:Luxembourg]      0.0  1.0  2785 
## b[(Intercept) nation:Netherlands]     0.0  1.0  1459 
## Sigma[region:(Intercept),(Intercept)] 0.0  1.0  1393 
## Sigma[nation:(Intercept),(Intercept)] 0.0  1.0  1553 
## mean_PPD                              0.0  1.0  3965 
## log-posterior                         0.3  1.0   960 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
# Extract Leave-One-Out Cross-Validation
loo_M1rs <- loo(M1.rstanarm)
## Warning: Found 11 observations with a pareto_k > 0.7. With this many problematic observations we recommend calling 'kfold' with argument 'K=10' to perform 10-fold cross-validation rather than LOO.
print(loo_M1rs)
## 
## Computed from 4000 by 354 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo  -1066.6 25.6
## p_loo        74.9  8.3
## looic      2133.2 51.2
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.3, 2.3]).
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. ESS
## (-Inf, 0.7]   (good)     343   96.9%   252     
##    (0.7, 1]   (bad)        9    2.5%   <NA>    
##    (1, Inf)   (very bad)   2    0.6%   <NA>    
## See help('pareto-k-diagnostic') for details.
# Posterior predictive check of reproduced data
y_rep <- posterior_predict(M1.rstanarm)

# densities 
pp_check(M1.rstanarm)

ggsave(file="images/densities_M1rstanarm.pdf", width=8,height=7)

# Plot the credible intervals
beta_names <- c(paste0("beta^", c("uvb")), "gl.intercept")
alpha_names<-c()
for (i in 1:25){
  alpha_names[i] <- paste0("region[", i,"]")
} # region 26 is missing
for (i in 27:79){
  alpha_names[i-1] <- paste0("region[", i,"]")
}
alpha_names[79] <- paste0("Belgium")
alpha_names[80] <- paste0("WG")
alpha_names[81] <- paste0("Denmark")
alpha_names[82] <- paste0("France")
alpha_names[83] <- paste0("UK")
alpha_names[84] <- paste0("Italy")
alpha_names[85] <- paste0("Ireland")
alpha_names[86] <- paste0("Luxembourg")
alpha_names[87] <- paste0("Netherlands")
alpha_names[88] <- paste0(expression(sigma[region]))
alpha_names[89] <- paste0(expression(sigma[nation]))
posterior_M1 <- as.matrix(M1.rstanarm)
mcmc_intervals(posterior_M1, regex_pars=c( "uvb",
                                           "(Intercept)", "b"))+
  xaxis_text(on =TRUE, size=rel(1.9))+
  yaxis_text(on =TRUE, size=rel(1.4))+
  scale_y_discrete(labels = ((parse(text= c(beta_names, alpha_names)))))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggsave(file="images/logistic_credible_intervals.pdf", width=5, height=length(alpha_names) * 0.25)

# Plot the posterior marginal densities along with 50% intervals for the ‘fixed-effects’ uvb
mcmc_areas(posterior_M1, regex_pars = c("uvb"))+
  xaxis_text(on =TRUE, size=rel(1.9))+
  yaxis_text(on =TRUE, size=rel(1.4))

ggsave(file = "images/logistic_fixed_effects.pdf", width=8, height=7)

# Plot the random effects (posterior mean +- s.e.)
int_ord <- sort(coef(M1.rstanarm)$region[,1], index.return=TRUE)$x
ord <- sort(coef(M1.rstanarm)$region[,1], index.return=TRUE)$ix
region.abbr <- levels(Mmmec$region)
region.abbr.ord <- region.abbr[ord]
se_ord <- M1.rstanarm$ses[ord]
par(xaxt="n", mfrow=c(1,1), mar = c(5,2,2,1))
plot(1:length(int_ord), int_ord, ylim=c(-1.4,1.4), pch=19, bg=2, xlab="Regions", 
      ylab="Intercepts",  cex.main=1.9, cex.lab=1.9)
for (h in 1:length(int_ord)){
  segments(h, int_ord[h]-se_ord[h], h, int_ord[h]+se_ord[h], col="red")
  is.wholenumber <-
    function(x, tol = .Machine$double.eps^0.5) 
      abs(x - round(x)) < tol
  if (is.wholenumber(h/2)){
    text(h, int_ord[h]+se_ord[h]+0.1, region.abbr.ord[h], cex=1.1)}else{
      text(h, int_ord[h]-se_ord[h]-0.1, region.abbr.ord[h], cex=1.1)
    }
}

ggsave(file="images/random_effects_log.pdf", height=7, width=length(int_ord) * 0.4)

# empirical distribution function
ppc_ecdf_overlay(y = M1.rstanarm$y, y_rep[1:200,])+
  xaxis_text(on =TRUE, size=22)+
  legend_text(size=rel(4))

ggsave(file="images/ecdf_M1.rstanarm.pdf", width=8, height=7)

# proportion of zero
prop_zero <- function(x) mean(x == 0)
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat = "prop_zero")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  legend_text(size=rel(4))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/proportion_zero_M1.rstanarm.pdf", width=8, height=7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# statistics
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="mean")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/mean_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="sd")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/sd_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="median")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/median_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="max")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/max_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# standardized residuals
mean_y_rep <- colMeans(y_rep)
std_resid <- (M1.rstanarm$y - mean_y_rep) / sqrt(mean_y_rep)
qplot(mean_y_rep, std_resid) + hline_at(2) + hline_at(-2)+
  labs(x="Mean of y_rep", y= "Standardized residuals")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text(size=16),
        axis.title.y = element_text(size=16))
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggsave(file="images/standardized_residuals_M1.rstanarm.pdf", width =8, height =7)

# predictive intervals
ppc_intervals(
  y = M1.rstanarm$y, 
  yrep = y_rep,
  x = NULL # missing data but in the next stan model it is NULL too
) + 
  labs(x = "Deaths", y = "Expected deaths")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text(size=20),
        axis.title.y = element_text(size=20))

ggsave(file="images/predictive_intervals_M1.rstanarm.pdf", width =8, height =7)



# Stan model of model A of the paper
# It is a variance components model with UVBI included in the fixed part of the model and
# random terms s_k, u_jk, e_ijk associated respectively with the intercept at level 3, 2, 1
# s_k ~ normal(0, sigma_s)
# u_jk ~ normal(0, sigma_u)

# Create UVB index as in Table III of the paper
uvbi_params <- data.frame(
  nation = c(unique(Mmmec$nation)),
  mean_UVBI = c(12.70, 12.79, 9.96, 17.18, 10.91, 21.45, 10.54, 13.26, 11.40),
  sd_UVBI = c(0.29, 1.35, 0.38, 2.59, 1.50, 3.51, 0.60, 0.05, 0.47),
  min_UVBI = c(12.17, 10.45, 9.47, 12.92, 6.69, 16.83, 9.64, 13.22, 10.62),
  max_UVBI = c(13.10, 15.15, 10.49, 23.24, 13.46, 28.95, 11.70, 13.31, 11.94)
)
# Join UVBI data to the dataframe Mmmec
Mmmec2 <- left_join(Mmmec, uvbi_params, by = "nation")
# Generate UVBI values for each county from mean_UVBI and sd_UVBI, respecting min_UVBI and max_UVBI
Mmmec2 <- Mmmec2 %>%
  mutate(UVBI = rtruncnorm(n(), a = min_UVBI, b = max_UVBI, mean = mean_UVBI, sd = sd_UVBI))
# Prepare data for stan model
stan_data <- list(
  N = nrow(Mmmec2),
  deaths = Mmmec2$deaths,
  expected = Mmmec2$expected,
  K = length(unique(Mmmec2$nation)),
  J = length(unique(Mmmec2$region)),
  k = as.integer(as.factor(Mmmec2$nation)),
  j = as.integer(as.factor(Mmmec2$region)),
  UVBI = Mmmec2$UVBI
)
comp_model_A <- stan_model('poisson_regression.stan')
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 16.0.0 (clang-1600.0.26.6)’
## using SDK: ‘’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
fit_model_A <- sampling(comp_model_A, data = stan_data, seed = 123)
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000117 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.17 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 6.414 seconds (Warm-up)
## Chain 1:                3.599 seconds (Sampling)
## Chain 1:                10.013 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 4.2e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.42 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 6.511 seconds (Warm-up)
## Chain 2:                4.635 seconds (Sampling)
## Chain 2:                11.146 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 4.4e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.44 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 6.797 seconds (Warm-up)
## Chain 3:                5.38 seconds (Sampling)
## Chain 3:                12.177 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 4.2e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.42 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 5.784 seconds (Warm-up)
## Chain 4:                5.319 seconds (Sampling)
## Chain 4:                11.103 seconds (Total)
## Chain 4:
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
print(fit_model_A, pars = c('beta0','beta1','sigma_s','sigma_u','sigma_e'))
## Inference for Stan model: anon_model.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##          mean se_mean   sd  2.5%   25%   50%  75% 97.5% n_eff Rhat
## beta0    0.05    0.01 0.20 -0.35 -0.07  0.06 0.18  0.45   775 1.00
## beta1   -0.01    0.00 0.01 -0.02 -0.01 -0.01 0.00  0.01  3702 1.00
## sigma_s  0.49    0.00 0.16  0.27  0.38  0.46 0.56  0.88  1700 1.00
## sigma_u  0.23    0.00 0.03  0.18  0.21  0.23 0.25  0.29  1381 1.00
## sigma_e  0.12    0.00 0.02  0.08  0.10  0.12 0.13  0.16   162 1.02
## 
## Samples were drawn using NUTS(diag_e) at Sat Jun 14 18:33:57 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
y_rep_model_A <- as.matrix(fit_model_A, pars = "y_rep")


# Posterior predictive checking

# Traceplot of the 4 chains to see if they mix well
traceplot(fit_model_A, pars = c('beta0', 'beta1', 'sigma_s', 'sigma_u', 'sigma_e'))

ggsave(file="images/traceplot.pdf", width=8, height=7)

# densities 
ppc_dens_overlay(y = stan_data$deaths, y_rep_model_A[1:200,])+
  xaxis_text(on =TRUE, size=22)+
  legend_text(size=rel(4))

ggsave(file="images/densities_model_A.pdf", width=8, height=7)

# empirical distribution function
ppc_ecdf_overlay(y = stan_data$deaths, y_rep_model_A[1:200,])+
  xaxis_text(on =TRUE, size=22)+
  legend_text(size=rel(4))

ggsave(file="images/ecdf_model_A.pdf", width=8, height=7)

# proportion of zero
prop_zero <- function(x) mean(x == 0)
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat = "prop_zero")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  legend_text(size=rel(4))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/proportion_zero_model_A.pdf", width=8, height=7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# statistics
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="mean")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/mean_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="sd")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/sd_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="median")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/median_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="max")+
  xaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text( size=22))+
  legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/max_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# standardized residuals
mean_y_rep_model_A <- colMeans(y_rep_model_A)
std_resid <- (stan_data$deaths - mean_y_rep_model_A) / sqrt(mean_y_rep_model_A)
qplot(mean_y_rep_model_A, std_resid) + hline_at(2) + hline_at(-2)+
  labs(x="Mean of y_rep", y= "Standardized residuals")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text(size=16),
        axis.title.y = element_text(size=16))

ggsave(file="images/standardized_residuals_model_A.pdf", width =8, height =7)

# predictive intervals
ppc_intervals(
  y = stan_data$deaths, 
  yrep = y_rep_model_A,
  x = stan_data$uvb
) + 
  labs(x = "Deaths", y = "Expected deaths")+
  xaxis_text(on =TRUE, size=22)+
  yaxis_text(on =TRUE, size=22)+
  theme(axis.title.x = element_text(size=20),
        axis.title.y = element_text(size=20))

ggsave(file="images/predictive_intervals_model_A.pdf", width =8, height =7)

# Extract Leave-One-Out Cross-Validation
# Note that since I wrote my own stan model, I had to store the pointwise log-likelihood
log_lik_A <- extract_log_lik(fit_model_A)
loo_A <- loo(log_lik_A)
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
print(loo_A)
## 
## Computed from 4000 by 354 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo  -1051.0 20.7
## p_loo       110.2  8.8
## looic      2102.0 41.4
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume independent draws (r_eff=1).
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. ESS
## (-Inf, 0.7]   (good)     320   90.4%   87      
##    (0.7, 1]   (bad)       31    8.8%   <NA>    
##    (1, Inf)   (very bad)   3    0.8%   <NA>    
## See help('pareto-k-diagnostic') for details.